/**
 DR Radio 2 is developed by Jacob Nordfalk, Hanafi Mughrabi and Frederik Aagaard.
 Some parts of the code are loosely based on Sveriges Radio Play for Android.

 DR Radio 2 for Android is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License version 2 as published by
 the Free Software Foundation.

 DR Radio 2 for Android is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 See the GNU General Public License for more details.

 You should have received a copy of the GNU General Public License along with
 DR Radio 2 for Android.  If not, see <http://www.gnu.org/licenses/>.

 */

package dk.dr.radio.data;

import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.Reader;
import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.HashMap;

import dk.dr.radio.diverse.Log;


public class EoDiverse {


  /**
   * Unescapes a string containing entity escapes to a string containing the actual Unicode characters corresponding to the escapes. Supports only HTML 3.0 entities.
   * Kilde: http://stackoverflow.com/questions/994331/java-how-to-decode-html-character-entities-in-java-like-httputility-htmldecode
   */
  public static final String unescapeHtml3(final String input) {
    StringWriter writer = null;
    int len = input.length();
    int i = 1;
    int st = 0;
    while (true) {
      // look for '&'
      while (i < len && input.charAt(i-1) != '&')
        i++;
      if (i >= len)
        break;

      // found '&', look for ';'
      int j = i;
      while (j < len && j < i + MAX_ESCAPE + 1 && input.charAt(j) != ';')
        j++;
      if (j == len || j < i + MIN_ESCAPE || j == i + MAX_ESCAPE + 1) {
        i++;
        continue;
      }

      // found escape
      if (input.charAt(i) == '#') {
        // numeric escape
        int k = i + 1;
        int radix = 10;

        final char firstChar = input.charAt(k);
        if (firstChar == 'x' || firstChar == 'X') {
          k++;
          radix = 16;
        }

        try {
          int entityValue = Integer.parseInt(input.substring(k, j), radix);

          if (writer == null)
            writer = new StringWriter(input.length());
          writer.append(input.substring(st, i - 1));

          if (entityValue > 0xFFFF) {
            final char[] chrs = Character.toChars(entityValue);
            writer.write(chrs[0]);
            writer.write(chrs[1]);
          } else {
            writer.write(entityValue);
          }

        } catch (NumberFormatException ex) {
          i++;
          continue;
        }
      }
      else {
        // named escape
        CharSequence value = lookupMap.get(input.substring(i, j));
        if (value == null) {
          i++;
          continue;
        }

        if (writer == null)
          writer = new StringWriter(input.length());
        writer.append(input.substring(st, i - 1));

        writer.append(value);
      }

      // skip escape
      st = j + 1;
      i = st;
    }

    if (writer != null) {
      writer.append(input.substring(st, len));
      return writer.toString();
    }
    return input;
  }

  private static final String[][] ESCAPES = {
      {"\"",     "quot"}, // " - double-quote
      {"&",      "amp"}, // & - ampersand
      {"<",      "lt"}, // < - less-than
      {">",      "gt"}, // > - greater-than

      // Mapping to escape ISO-8859-1 characters to their named HTML 3.x equivalents.
      {"\u00A0", "nbsp"}, // non-breaking space
      {"\u00A1", "iexcl"}, // inverted exclamation mark
      {"\u00A2", "cent"}, // cent sign
      {"\u00A3", "pound"}, // pound sign
      {"\u00A4", "curren"}, // currency sign
      {"\u00A5", "yen"}, // yen sign = yuan sign
      {"\u00A6", "brvbar"}, // broken bar = broken vertical bar
      {"\u00A7", "sect"}, // section sign
      {"\u00A8", "uml"}, // diaeresis = spacing diaeresis
      {"\u00A9", "copy"}, // © - copyright sign
      {"\u00AA", "ordf"}, // feminine ordinal indicator
      {"\u00AB", "laquo"}, // left-pointing double angle quotation mark = left pointing guillemet
      {"\u00AC", "not"}, // not sign
      {"\u00AD", "shy"}, // soft hyphen = discretionary hyphen
      {"\u00AE", "reg"}, // ® - registered trademark sign
      {"\u00AF", "macr"}, // macron = spacing macron = overline = APL overbar
      {"\u00B0", "deg"}, // degree sign
      {"\u00B1", "plusmn"}, // plus-minus sign = plus-or-minus sign
      {"\u00B2", "sup2"}, // superscript two = superscript digit two = squared
      {"\u00B3", "sup3"}, // superscript three = superscript digit three = cubed
      {"\u00B4", "acute"}, // acute accent = spacing acute
      {"\u00B5", "micro"}, // micro sign
      {"\u00B6", "para"}, // pilcrow sign = paragraph sign
      {"\u00B7", "middot"}, // middle dot = Georgian comma = Greek middle dot
      {"\u00B8", "cedil"}, // cedilla = spacing cedilla
      {"\u00B9", "sup1"}, // superscript one = superscript digit one
      {"\u00BA", "ordm"}, // masculine ordinal indicator
      {"\u00BB", "raquo"}, // right-pointing double angle quotation mark = right pointing guillemet
      {"\u00BC", "frac14"}, // vulgar fraction one quarter = fraction one quarter
      {"\u00BD", "frac12"}, // vulgar fraction one half = fraction one half
      {"\u00BE", "frac34"}, // vulgar fraction three quarters = fraction three quarters
      {"\u00BF", "iquest"}, // inverted question mark = turned question mark
      {"\u00C0", "Agrave"}, // А - uppercase A, grave accent
      {"\u00C1", "Aacute"}, // Б - uppercase A, acute accent
      {"\u00C2", "Acirc"}, // В - uppercase A, circumflex accent
      {"\u00C3", "Atilde"}, // Г - uppercase A, tilde
      {"\u00C4", "Auml"}, // Д - uppercase A, umlaut
      {"\u00C5", "Aring"}, // Е - uppercase A, ring
      {"\u00C6", "AElig"}, // Ж - uppercase AE
      {"\u00C7", "Ccedil"}, // З - uppercase C, cedilla
      {"\u00C8", "Egrave"}, // И - uppercase E, grave accent
      {"\u00C9", "Eacute"}, // Й - uppercase E, acute accent
      {"\u00CA", "Ecirc"}, // К - uppercase E, circumflex accent
      {"\u00CB", "Euml"}, // Л - uppercase E, umlaut
      {"\u00CC", "Igrave"}, // М - uppercase I, grave accent
      {"\u00CD", "Iacute"}, // Н - uppercase I, acute accent
      {"\u00CE", "Icirc"}, // О - uppercase I, circumflex accent
      {"\u00CF", "Iuml"}, // П - uppercase I, umlaut
      {"\u00D0", "ETH"}, // Р - uppercase Eth, Icelandic
      {"\u00D1", "Ntilde"}, // С - uppercase N, tilde
      {"\u00D2", "Ograve"}, // Т - uppercase O, grave accent
      {"\u00D3", "Oacute"}, // У - uppercase O, acute accent
      {"\u00D4", "Ocirc"}, // Ф - uppercase O, circumflex accent
      {"\u00D5", "Otilde"}, // Х - uppercase O, tilde
      {"\u00D6", "Ouml"}, // Ц - uppercase O, umlaut
      {"\u00D7", "times"}, // multiplication sign
      {"\u00D8", "Oslash"}, // Ш - uppercase O, slash
      {"\u00D9", "Ugrave"}, // Щ - uppercase U, grave accent
      {"\u00DA", "Uacute"}, // Ъ - uppercase U, acute accent
      {"\u00DB", "Ucirc"}, // Ы - uppercase U, circumflex accent
      {"\u00DC", "Uuml"}, // Ь - uppercase U, umlaut
      {"\u00DD", "Yacute"}, // Э - uppercase Y, acute accent
      {"\u00DE", "THORN"}, // Ю - uppercase THORN, Icelandic
      {"\u00DF", "szlig"}, // Я - lowercase sharps, German
      {"\u00E0", "agrave"}, // а - lowercase a, grave accent
      {"\u00E1", "aacute"}, // б - lowercase a, acute accent
      {"\u00E2", "acirc"}, // в - lowercase a, circumflex accent
      {"\u00E3", "atilde"}, // г - lowercase a, tilde
      {"\u00E4", "auml"}, // д - lowercase a, umlaut
      {"\u00E5", "aring"}, // е - lowercase a, ring
      {"\u00E6", "aelig"}, // ж - lowercase ae
      {"\u00E7", "ccedil"}, // з - lowercase c, cedilla
      {"\u00E8", "egrave"}, // и - lowercase e, grave accent
      {"\u00E9", "eacute"}, // й - lowercase e, acute accent
      {"\u00EA", "ecirc"}, // к - lowercase e, circumflex accent
      {"\u00EB", "euml"}, // л - lowercase e, umlaut
      {"\u00EC", "igrave"}, // м - lowercase i, grave accent
      {"\u00ED", "iacute"}, // н - lowercase i, acute accent
      {"\u00EE", "icirc"}, // о - lowercase i, circumflex accent
      {"\u00EF", "iuml"}, // п - lowercase i, umlaut
      {"\u00F0", "eth"}, // р - lowercase eth, Icelandic
      {"\u00F1", "ntilde"}, // с - lowercase n, tilde
      {"\u00F2", "ograve"}, // т - lowercase o, grave accent
      {"\u00F3", "oacute"}, // у - lowercase o, acute accent
      {"\u00F4", "ocirc"}, // ф - lowercase o, circumflex accent
      {"\u00F5", "otilde"}, // х - lowercase o, tilde
      {"\u00F6", "ouml"}, // ц - lowercase o, umlaut
      {"\u00F7", "divide"}, // division sign
      {"\u00F8", "oslash"}, // ш - lowercase o, slash
      {"\u00F9", "ugrave"}, // щ - lowercase u, grave accent
      {"\u00FA", "uacute"}, // ъ - lowercase u, acute accent
      {"\u00FB", "ucirc"}, // ы - lowercase u, circumflex accent
      {"\u00FC", "uuml"}, // ь - lowercase u, umlaut
      {"\u00FD", "yacute"}, // э - lowercase y, acute accent
      {"\u00FE", "thorn"}, // ю - lowercase thorn, Icelandic
      {"\u00FF", "yuml"}, // я - lowercase y, umlaut
  };

  private static final int MIN_ESCAPE = 2;
  private static final int MAX_ESCAPE = 6;

  private static final HashMap<String, CharSequence> lookupMap;
  static {
    lookupMap = new HashMap<String, CharSequence>();
    for (final CharSequence[] seq : ESCAPES)
      lookupMap.put(seq[1].toString(), seq[0]);
  }

}
